# Author: Ivan Canay | Northwestern University | March 2023
# Copyright (c) 2023 Northwestern University. All rights reserved.
#-------------------------------------------------------------------
# X-plain: Computes the simulations for non-ignorable clusters paper 
#------------------------------------------------------------------
rm(list=ls(all=TRUE))               # Clear all variables
source("NICS.Functions.R")
library(xtable)

#-------------------------------------------------------------------
# Parametrics provided in the Script 
args <- commandArgs(trailingOnly = TRUE);
myseed = as.numeric(args[1]);
design = as.numeric(args[2]);			# design: 1-2
car.design = as.numeric(args[3]);			# design: 1-2
G = as.numeric(args[4]);				# number of clusters
Nmax = as.numeric(args[5]); 			# Maximum number of possible obs within cluster
 # myseed=1111
 # design=2;
 # car.design=1;
 # G=5000;
 # Nmax=500;
 MC=5000;
 alpha=0.05;
 mu.1=0;
#-------------------------------------------------------------------

# GENERATE DATA and SET PARAMETERS
# - - - - - - - - - - - - - - - - -
dir.create("Tables")
nametext <-paste("Tables/G",G,Nmax,"dgn",design,"car",car.design,myseed,"txt",sep=".");
dir.create("Latex")
nameLaTeX<-paste("Latex/G",G,Nmax,"dgn",design,"car",car.design,myseed,"tex",sep=".");
# dir.create("RData")
# Rname 	 <-paste("RData/NICS",G,Nmax,"design",design,myseed,"Rdata",sep=".");
set.seed(myseed);

# Set all coverage probabilities to zero (12 cases)
hat.theta.1 = matrix(0,MC,12);
hat.theta.2 = matrix(0,MC,12);
se.theta.1 = matrix(0,MC,12);
se.theta.2 = matrix(0,MC,12);
cov.CI.1 = matrix(0,12,1);
cov.CI.2 = matrix(0,12,1);
max.Ng = matrix(0,MC,1);

# Compute the true values of thetas
	max.support = Nmax/10-1;
	true.theta = pop.theta(max.support,design);
	true.theta.12 = rbind(true.theta,true.theta,true.theta);

time1 <- proc.time()
time3 <- proc.time()

for (j in 1:MC){
	# Generate cluster sizes, samples, and data
	cluster.sizes   = gen.cluster.sizes(G,max.support);
	cluster.samples = gen.cluster.samples(cluster.sizes);
	data = gen.data(cluster.samples,design,car.design,mu.1);
	max.Ng[j] = max(cluster.sizes[,4]); 

	# Compute the estimated thetas and standard errors
	estimates = est.theta.diff(data,G,car.design);
	hat.theta.1[j,]= t(estimates$hat.theta[,1]);
	hat.theta.2[j,]= t(estimates$hat.theta[,2]);
	se.theta.1[j,] = sqrt(G)*t(estimates$st.dev[,1]);
	se.theta.2[j,] = sqrt(G)*t(estimates$st.dev[,2]);

	# Compute Confidence Intervals
	cv = qnorm(1-alpha/2);
	ind.conf.U.1 = (true.theta.12[,1] <= estimates$hat.theta[,1] + cv*estimates$st.dev[,1]);
	ind.conf.L.1 = (true.theta.12[,1] >= estimates$hat.theta[,1] - cv*estimates$st.dev[,1]);	
	ind.conf.U.2 = (true.theta.12[,2] <= estimates$hat.theta[,2] + cv*estimates$st.dev[,2]);
	ind.conf.L.2 = (true.theta.12[,2] >= estimates$hat.theta[,2] - cv*estimates$st.dev[,2]);

	cov.CI.1 = cov.CI.1 + cbind((ind.conf.U.1)*(ind.conf.L.1))/MC;
	cov.CI.2 = cov.CI.2 + cbind((ind.conf.U.2)*(ind.conf.L.2))/MC;

	# Report Iterations on Screen for monitoring
	if (floor(10*j/MC)==10*j/MC){
    cat("% completed = ", j/MC*100, "\n")
	  time4 <- proc.time();
    cat("Time Last round (min) = ", (time4[3]-time3[3])/60, "\n")
    time3 <-proc.time()
	} 
}
time2 <- proc.time();
cat(sprintf("Elapsed Time (min): %1.3f",(time2[3]-time1[3])/60), "\n");

print(cov.CI.1)
print(cov.CI.2)

## Display Information
basics_a <- c(G,Nmax,design,car.design,alpha,MC);
names(basics_a)<- c("   G","   Nmax", "Design", "CAR","alpha", "MC simu");

## Compute average of thetas and se 
mean.theta.1  = cbind(colMeans(hat.theta.1));
mean.theta.2  = cbind(colMeans(hat.theta.2));
mean.se.1  = cbind(colMeans(se.theta.1));
mean.se.2  = cbind(colMeans(se.theta.2));

results = cbind(true.theta.12,mean.theta.1,mean.theta.2,mean.se.1,mean.se.2,cov.CI.1,cov.CI.2);
colnames(results)<-c("true 1", "true 2", "thetahat.1", "thetahat.2", " sigma-1", "sigma-2", "Cov 1", "Cov 2");
#tablenames = c("       ", "true 1", "true 2    ", "thetahat.1  ", "thetahat.2   ", " se 1    ", "   se 2   ", "  Cov 1 ", "  Cov 2  ");

# Save LaTeX table
# VERY IMPORTANT!!! row names cannot be duplicates, so make sure there are different "spaces" in each 
# name that is an empty name. 
x.col2 = c("$Bb(1,1)$","$Bb(0.4,0.4)$","$Bb(10,50)$","$|t|(1.5)$","$Bb(1,1)$","$Bb(0.4,0.4)$",
	"$Bb(10,50)$","$|t|(1.5)$","$Bb(1,1)$","$Bb(0.4,0.4)$","$Bb(10,50)$","$|t|(1.5)$");
X.results<-data.frame(x.col2,round(results, 4));
rownames(X.results)<-c("$Ng$",""," ","   ","$10$","    ","     ","      ","$gamma Ng$","       ","        ","         ");

print(X.results)
X.results<-xtable(X.results,digits=4);
print(X.results, type="latex", file=nameLaTeX);

print(results)

# # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Saving information to a text file
out <- file(nametext,"wt");   # Choose name of the file here
cat("---------------------------------------------------------------------------------------------------------------------", "\n",file=out);
cat(date(), "\n",file=out)
cat("Seed of the round = ", myseed , "\n",file=out)
cat("Design used       = ", design , "\n",file=out)
cat("CAR Design used   = ", car.design , "\n",file=out)
cat("----------------------------------------------- Parameters ----------------------------------------------------------", "\n",file=out);
cat(names(basics_a),sep="   ","\n",file=out);
cat(formatC(basics_a,digits=2,format="f"),sep="   ", "\n",file=out);
cat("\n",file=out)
cat("--------------------------------- Summary of maximum of Ng for 10|t(1.5)|+10 dist ----------------------------------------", "\n",file=out);
cat(formatC(summary(max.Ng),digits=2,format="f"),sep="   ", "\n",file=out);
cat("\n",file=out)
cat("---------------------------------------- MAIN RESULTS ----------------------------------------------------", "\n",file=out);
#cat(tablenames,sep="               ","\n",file=out);
write.table(format(results, drop0trailing=FALSE),file=out,quote=F,sep='\t\t',row.names=T,col.names=T)
cat("\n",file=out)
cat("---------------------------------------------------------------------------------------------------------------------", "\n",file=out);
time2 <- proc.time();
cat(sprintf("Elapsed Time (min): %1.3f",(time2[3]-time1[3])/60), "\n",file=out);
cat("---------------------------------------------------------------------------------------------------------------------", "\n",file=out);
cat(" Ng has 4 DGPS: 1 (BB flat) 2 (BB smiley face) 3 (BB bell shaped, long tail) and 4 is |t(1.5)| ", "\n",file=out);
cat(" Mg simulated in three alternative ways: 1 Mg=Ng, 2 Mg=10, 3 Mg=prop to Ng", "\n",file=out);
cat(" Design 1: Zg is i.i.d | Design 2: Zg depends on Ng ", "\n",file=out);
cat(" CAR Design 0: SRS | 1: 10 strata on Zg2 | 2: 5 strata on Zg2 times (Ng>median) (depends on Ng) ", "\n",file=out);
cat("---------------------------------------------------------------------------------------------------------------------", "\n",file=out);
close(out);

#save(list = ls(all = TRUE), file = Rname)